knitr::opts_chunk$set(echo = TRUE)
library(knitr)
library(ggplot2)
library(reshape2)
library(caret)
## Loading required package: lattice
library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(caTools)
#library(pls) # pls
library(MASS) #lda
library(naivebayes) # naivebayes
## naivebayes 0.9.6 loaded
library(ipred); library(plyr); library(e1071) # bagged tree
library(randomForest) #rf
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
library(gbm) # gbm
## Loaded gbm 2.1.5
#library(xgboost) #xgboost
#Data Input, preparation and descriptives ##Import the dataframe of patient parameter and NGC expression
ctmmMir <- read.delim(file = "CTMM_miR_all.txt", stringsAsFactors = FALSE)
str(ctmmMir[,1:6])
## 'data.frame': 396 obs. of 6 variables:
## $ Sorting.index : int 313 4 5 6 316 7 29 30 336 337 ...
## $ Numeric_Pat_code: int 9 10 12 14 15 16 57 58 59 60 ...
## $ Hospital : int 1 1 1 1 1 1 1 1 1 1 ...
## $ CC_number : int 10045 10046 10048 10050 10051 10052 10107 10108 10109 10110 ...
## $ Sex : int 0 1 1 1 0 1 1 0 1 0 ...
## $ Race : int 1 1 1 1 1 1 1 1 1 1 ...
kable(head(ctmmMir[,1:6]))
| Sorting.index | Numeric_Pat_code | Hospital | CC_number | Sex | Race |
|---|---|---|---|---|---|
| 313 | 9 | 1 | 10045 | 0 | 1 |
| 4 | 10 | 1 | 10046 | 1 | 1 |
| 5 | 12 | 1 | 10048 | 1 | 1 |
| 6 | 14 | 1 | 10050 | 1 | 1 |
| 316 | 15 | 1 | 10051 | 0 | 1 |
| 7 | 16 | 1 | 10052 | 1 | 1 |
names(ctmmMir)
## [1] "Sorting.index" "Numeric_Pat_code"
## [3] "Hospital" "CC_number"
## [5] "Sex" "Race"
## [7] "age" "Length_cm"
## [9] "Weight" "bmi"
## [11] "BMI_quartile" "Heart_rate"
## [13] "Blood_Pressure_systolic" "Systolic_Quartile"
## [15] "Diastolic_Quartile" "Blood_Pressure_diastolic"
## [17] "Pres_diag" "Conf_Diagn"
## [19] "NYHA" "Prev_HF"
## [21] "Prev_MI" "Prev_PTCA"
## [23] "Prev_CABG" "Pulmonary_disease"
## [25] "CVA_TIA" "Renail_failure"
## [27] "Peripheral_vessel_disease" "Hypertension"
## [29] "Known_dislipidemia" "Diabetes_Mellitus"
## [31] "Current_Smoker" "Pack_years"
## [33] "Familial_MI" "FH_below_60"
## [35] "Heart_Failure" "SYNTAX"
## [37] "SYNTAX_quartiles" "SYNTAXBIN_classic"
## [39] "Added_SYNTAX" "TOTALSYNTAX_quartiles"
## [41] "TOTAL_SYNTAX" "CRP_Quartiles"
## [43] "Lab_CRP" "Glucose_Quartiles"
## [45] "Lab_glucose" "Lab_WBC"
## [47] "Hemoglobin_Quartiles" "Lab_heamogl"
## [49] "Lab_segm_neutro" "Lab__lymp"
## [51] "Lab_mono" "Lab_tot_chol"
## [53] "Lab_HDL_chol" "Lab_trigly"
## [55] "LDL_Quartiles" "Lab_LDL_chol"
## [57] "Angiog_descr_domin" "Angiog_num_diss_vesse"
## [59] "Total_occlus" "Collaterals"
## [61] "Prev_bypass" "Prev_stent"
## [63] "Prev_PTCA_angio" "Resid_sten"
## [65] "Death" "Angina"
## [67] "Myocard_inf" "Sec_perc_interv"
## [69] "Coro_bypass" "Cerbr_acci"
## [71] "Arrythmia_teat" "FW_Death"
## [73] "Angina_pectoris_fu" "Myocard_infarc_fu"
## [75] "Sec_perc_interv_fu" "Coronary_bypass_fu"
## [77] "Cerebro_vasc_accid" "Arrythmia"
## [79] "Blood_pressure_systolic_FU" "Blood_pressure_diastolic_FU"
## [81] "Death_date" "Cardiac_cause"
## [83] "event_score" "MACE_dichotomous"
## [85] "pe_betablock" "pe_calcium"
## [87] "pe_nitrate" "pe_ASAacetylz"
## [89] "pe_thromb" "pe_ADPclopidog"
## [91] "pe_ezeoverchol" "pe_ACE"
## [93] "pe_ATrecep" "pe_diurectica"
## [95] "pe_statines" "pe_antiarrhy"
## [97] "pe_other" "ap_nitrate"
## [99] "ap_ASAacetyl" "ap_clopido"
## [101] "dis_betablock" "dis_ASAacetyl"
## [103] "dis_ADPclopido" "dis_ACEinhi"
## [105] "dis_statines" "dis_othermed"
## [107] "Abs_cell_count_classical" "Classic_Quartile"
## [109] "Classic_Hi_Lo" "Abs_cell_count_Intmed"
## [111] "Intermediate_Quartile" "IntMed_Hi_Lo"
## [113] "Abs_cell_count_nonclass" "Non_Classic_Quartile"
## [115] "Non_Classic_Hi_Lo" "Class_perc_of_tot_monocyte_count"
## [117] "Class_Perc_Quartile" "IntMed_perc"
## [119] "Int_Perc_Quartile" "Non_Class_Perc"
## [121] "Non_Class_Perc_Quartile" "hsa_let_7f"
## [123] "hsa_let_7g" "hsa_let_7i"
## [125] "hsa_miR_103" "hsa_miR_106b"
## [127] "hsa_miR_124" "hsa_miR_126"
## [129] "hsa_miR_130b" "hsa_miR_138"
## [131] "hsa_miR_139_5p" "hsa_miR_140_5p"
## [133] "hsa_miR_142_3p" "hsa_miR_142_5p"
## [135] "hsa_miR_145" "hsa_miR_146a"
## [137] "hsa_miR_150" "hsa_miR_155"
## [139] "hsa_miR_15b" "hsa_miR_181a"
## [141] "hsa_miR_193a_5p" "hsa_miR_197"
## [143] "hsa_miR_19a" "hsa_miR_19b"
## [145] "hsa_miR_20a" "hsa_miR_20b"
## [147] "hsa_miR_21" "hsa_miR_212"
## [149] "hsa_miR_221" "hsa_miR_223"
## [151] "hsa_miR_26b" "hsa_miR_27a"
## [153] "hsa_miR_27b" "hsa_miR_28_5p"
## [155] "hsa_miR_29a" "hsa_miR_342_3p"
## [157] "hsa_miR_365" "hsa_miR_376c"
## [159] "hsa_miR_424" "hsa_miR_450b_5p"
## [161] "hsa_miR_451" "hsa_miR_486_3p"
## [163] "hsa_miR_574_3p" "hsa_miR_590_5p"
## [165] "hsa_miR_93" "hsa_miR_98"
## [167] "hsa_let_7b" "hsa_let_7d"
## [169] "hsa_let_7e" "hsa_miR_1"
## [171] "hsa_miR_100" "hsa_miR_10a"
## [173] "hsa_miR_125a_5p" "hsa_miR_125b"
## [175] "hsa_miR_128" "hsa_miR_130a"
## [177] "hsa_miR_132" "hsa_miR_133a"
## [179] "hsa_miR_133b" "hsa_miR_146b_5p"
## [181] "hsa_miR_17" "hsa_miR_18b"
## [183] "hsa_miR_191" "hsa_miR_193b"
## [185] "hsa_miR_195" "hsa_miR_200a"
## [187] "hsa_miR_210" "hsa_miR_218"
## [189] "hsa_miR_26a" "hsa_miR_28_3p"
## [191] "hsa_miR_31" "hsa_miR_32"
## [193] "hsa_miR_328" "hsa_miR_331_3p"
## [195] "hsa_miR_34a" "hsa_miR_345"
## [197] "hsa_miR_374b" "hsa_miR_422a"
## [199] "hsa_miR_423_5p" "hsa_miR_449a"
## [201] "hsa_miR_449b" "hsa_miR_487b"
## [203] "hsa_miR_491_5p" "hsa_miR_500"
## [205] "hsa_miR_501_5p" "hsa_miR_503"
## [207] "hsa_miR_628_5p" "hsa_miR_671_3p"
## [209] "hsa_miR_708" "hsa_miR_885_5p"
## [211] "hsa_miR_92a" "hsa_miR_99a"
## [213] "hsa_miR_99b"
##Split into responce and NGC expression Split data to clinical parameters and NGC expression
res <- ctmmMir[,1:121]
row.names(res) <- ctmmMir$CC_number
ftrExprs <- ctmmMir[,122:213]
row.names(ftrExprs) <- ctmmMir$CC_number
head(names(res))
## [1] "Sorting.index" "Numeric_Pat_code" "Hospital"
## [4] "CC_number" "Sex" "Race"
head(names(ftrExprs))
## [1] "hsa_let_7f" "hsa_let_7g" "hsa_let_7i" "hsa_miR_103"
## [5] "hsa_miR_106b" "hsa_miR_124"
##Descriptives of the data
Missing Values (Only one missing for confirmed diagnosis)
numNA <- sapply(ctmmMir, function(x) sum(is.na(x)))
numNA[!(numNA == 0)]
## Length_cm Weight
## 8 6
## bmi BMI_quartile
## 8 8
## Heart_rate Blood_Pressure_systolic
## 5 8
## Systolic_Quartile Diastolic_Quartile
## 8 8
## Blood_Pressure_diastolic Hypertension
## 8 1
## Known_dislipidemia Diabetes_Mellitus
## 1 2
## Current_Smoker Pack_years
## 1 50
## Familial_MI Heart_Failure
## 10 2
## SYNTAX SYNTAX_quartiles
## 179 179
## SYNTAXBIN_classic Added_SYNTAX
## 179 331
## TOTALSYNTAX_quartiles TOTAL_SYNTAX
## 179 179
## CRP_Quartiles Lab_CRP
## 123 123
## Glucose_Quartiles Lab_glucose
## 3 3
## Hemoglobin_Quartiles Lab_heamogl
## 19 19
## Lab_segm_neutro Lab__lymp
## 99 99
## Lab_mono Lab_tot_chol
## 100 3
## Lab_HDL_chol Lab_trigly
## 3 6
## LDL_Quartiles Lab_LDL_chol
## 3 3
## Angiog_descr_domin Angiog_num_diss_vesse
## 10 10
## Total_occlus Collaterals
## 37 38
## Prev_bypass Prev_stent
## 36 36
## Prev_PTCA_angio Resid_sten
## 38 78
## Death Angina
## 10 11
## Myocard_inf Sec_perc_interv
## 11 13
## Coro_bypass Cerbr_acci
## 11 11
## Arrythmia_teat FW_Death
## 11 40
## Angina_pectoris_fu Myocard_infarc_fu
## 42 42
## Sec_perc_interv_fu Coronary_bypass_fu
## 42 42
## Cerebro_vasc_accid Arrythmia
## 42 42
## Blood_pressure_systolic_FU Blood_pressure_diastolic_FU
## 258 258
## event_score MACE_dichotomous
## 21 11
## pe_betablock pe_calcium
## 14 14
## pe_nitrate pe_ASAacetylz
## 14 14
## pe_thromb pe_ADPclopidog
## 14 14
## pe_ezeoverchol pe_ACE
## 14 14
## pe_ATrecep pe_diurectica
## 14 14
## pe_statines pe_antiarrhy
## 14 14
## pe_other ap_nitrate
## 14 14
## ap_ASAacetyl ap_clopido
## 15 15
## dis_betablock dis_ASAacetyl
## 14 14
## dis_ADPclopido dis_ACEinhi
## 14 14
## dis_statines dis_othermed
## 14 14
## hsa_let_7f hsa_miR_124
## 1 15
## hsa_miR_138 hsa_miR_139_5p
## 139 13
## hsa_miR_142_3p hsa_miR_193a_5p
## 1 1
## hsa_miR_212 hsa_miR_223
## 10 1
## hsa_miR_27b hsa_miR_28_5p
## 1 1
## hsa_miR_376c hsa_miR_424
## 61 20
## hsa_miR_450b_5p hsa_miR_451
## 18 110
## hsa_miR_486_3p hsa_miR_98
## 2 1
## hsa_miR_1 hsa_miR_100
## 214 1
## hsa_miR_10a hsa_miR_125a_5p
## 30 20
## hsa_miR_125b hsa_miR_130a
## 16 1
## hsa_miR_133a hsa_miR_133b
## 113 309
## hsa_miR_18b hsa_miR_193b
## 176 9
## hsa_miR_200a hsa_miR_210
## 33 12
## hsa_miR_218 hsa_miR_31
## 158 117
## hsa_miR_32 hsa_miR_34a
## 3 23
## hsa_miR_374b hsa_miR_449a
## 7 134
## hsa_miR_449b hsa_miR_487b
## 116 127
## hsa_miR_491_5p hsa_miR_500
## 1 4
## hsa_miR_503 hsa_miR_708
## 148 11
## hsa_miR_885_5p hsa_miR_99a
## 3 1
## hsa_miR_99b
## 37
Different types of variables (continuous, categrical or descriptive)
levelsVar <- sapply(X = res, FUN = function(x) { nlevels(factor(x))})
catVar <- levelsVar <10
numVar <- !catVar & sapply(X = res, FUN = function(x) { is.numeric(x)})
charVar <- !(catVar | numVar)
Categrical variables are:
names(res)[catVar]
## [1] "Hospital" "Sex"
## [3] "Race" "BMI_quartile"
## [5] "Systolic_Quartile" "Diastolic_Quartile"
## [7] "Pres_diag" "Conf_Diagn"
## [9] "NYHA" "Prev_HF"
## [11] "Prev_MI" "Prev_PTCA"
## [13] "Prev_CABG" "Pulmonary_disease"
## [15] "CVA_TIA" "Renail_failure"
## [17] "Peripheral_vessel_disease" "Hypertension"
## [19] "Known_dislipidemia" "Diabetes_Mellitus"
## [21] "Current_Smoker" "Familial_MI"
## [23] "FH_below_60" "Heart_Failure"
## [25] "SYNTAX_quartiles" "SYNTAXBIN_classic"
## [27] "TOTALSYNTAX_quartiles" "CRP_Quartiles"
## [29] "Glucose_Quartiles" "Hemoglobin_Quartiles"
## [31] "LDL_Quartiles" "Angiog_descr_domin"
## [33] "Angiog_num_diss_vesse" "Total_occlus"
## [35] "Collaterals" "Prev_bypass"
## [37] "Prev_stent" "Prev_PTCA_angio"
## [39] "Resid_sten" "Death"
## [41] "Angina" "Myocard_inf"
## [43] "Sec_perc_interv" "Coro_bypass"
## [45] "Cerbr_acci" "Arrythmia_teat"
## [47] "FW_Death" "Angina_pectoris_fu"
## [49] "Myocard_infarc_fu" "Sec_perc_interv_fu"
## [51] "Coronary_bypass_fu" "Cerebro_vasc_accid"
## [53] "Arrythmia" "Death_date"
## [55] "Cardiac_cause" "event_score"
## [57] "MACE_dichotomous" "pe_betablock"
## [59] "pe_calcium" "pe_nitrate"
## [61] "pe_ASAacetylz" "pe_thromb"
## [63] "pe_ADPclopidog" "pe_ezeoverchol"
## [65] "pe_ACE" "pe_ATrecep"
## [67] "pe_diurectica" "pe_statines"
## [69] "pe_antiarrhy" "pe_other"
## [71] "ap_nitrate" "ap_ASAacetyl"
## [73] "ap_clopido" "dis_betablock"
## [75] "dis_ASAacetyl" "dis_ADPclopido"
## [77] "dis_ACEinhi" "dis_statines"
## [79] "dis_othermed" "Classic_Quartile"
## [81] "Classic_Hi_Lo" "Intermediate_Quartile"
## [83] "IntMed_Hi_Lo" "Non_Classic_Quartile"
## [85] "Non_Classic_Hi_Lo" "Class_Perc_Quartile"
## [87] "Int_Perc_Quartile" "Non_Class_Perc_Quartile"
Continuous variables are:
names(res)[numVar]
## [1] "Sorting.index" "Numeric_Pat_code"
## [3] "CC_number" "age"
## [5] "Length_cm" "Weight"
## [7] "bmi" "Heart_rate"
## [9] "Blood_Pressure_systolic" "Blood_Pressure_diastolic"
## [11] "Pack_years" "SYNTAX"
## [13] "Added_SYNTAX" "TOTAL_SYNTAX"
## [15] "Lab_CRP" "Lab_glucose"
## [17] "Lab_WBC" "Lab_heamogl"
## [19] "Lab_segm_neutro" "Lab__lymp"
## [21] "Lab_mono" "Lab_tot_chol"
## [23] "Lab_HDL_chol" "Lab_trigly"
## [25] "Lab_LDL_chol" "Blood_pressure_systolic_FU"
## [27] "Blood_pressure_diastolic_FU" "Abs_cell_count_classical"
## [29] "Abs_cell_count_Intmed" "Abs_cell_count_nonclass"
## [31] "Class_perc_of_tot_monocyte_count" "IntMed_perc"
## [33] "Non_Class_Perc"
Descriptive variables are:
names(res)[charVar]
## character(0)
Tables of categorical variables
catTables <- sapply(res[catVar], function(x) table(as.factor(x)))
lapply(catTables, function(x) x)
## $Hospital
##
## 1 2 3 4
## 79 122 97 98
##
## $Sex
##
## 0 1
## 110 286
##
## $Race
##
## 1 2 3 4
## 379 7 4 6
##
## $BMI_quartile
##
## 1 2 3 4
## 97 101 99 91
##
## $Systolic_Quartile
##
## 1 2 3 4
## 85 101 105 97
##
## $Diastolic_Quartile
##
## 1 2 3 4
## 104 76 95 113
##
## $Pres_diag
##
## 0 1 2 3
## 10 312 30 44
##
## $Conf_Diagn
##
## 0 1 2 3 9
## 10 305 31 37 13
##
## $NYHA
##
## 1 2 3 4
## 276 78 27 15
##
## $Prev_HF
##
## 0 1
## 386 10
##
## $Prev_MI
##
## 0 1 2 9
## 284 98 12 2
##
## $Prev_PTCA
##
## 0 1 2 9
## 256 106 28 6
##
## $Prev_CABG
##
## 0 1 2
## 362 33 1
##
## $Pulmonary_disease
##
## 0 1
## 350 46
##
## $CVA_TIA
##
## 0 1
## 369 27
##
## $Renail_failure
##
## 0 1
## 390 6
##
## $Peripheral_vessel_disease
##
## 0 1 2 3 4 9
## 341 28 10 7 2 8
##
## $Hypertension
##
## 0 1
## 151 244
##
## $Known_dislipidemia
##
## 0 1
## 143 252
##
## $Diabetes_Mellitus
##
## 0 1
## 308 86
##
## $Current_Smoker
##
## 0 1
## 316 79
##
## $Familial_MI
##
## 0 1
## 234 152
##
## $FH_below_60
##
## 0 1
## 232 164
##
## $Heart_Failure
##
## 0 2 3
## 392 1 1
##
## $SYNTAX_quartiles
##
## 1 2 3 4
## 56 51 56 54
##
## $SYNTAXBIN_classic
##
## 1 2 3
## 182 27 8
##
## $TOTALSYNTAX_quartiles
##
## 1 2 3 4
## 59 49 55 54
##
## $CRP_Quartiles
##
## 1 2 3 4
## 66 87 83 37
##
## $Glucose_Quartiles
##
## 1 2 3 4
## 104 95 87 107
##
## $Hemoglobin_Quartiles
##
## 1 2 3 4
## 94 97 98 88
##
## $LDL_Quartiles
##
## 1 2 3 4
## 112 98 91 92
##
## $Angiog_descr_domin
##
## 0 1 2
## 30 48 308
##
## $Angiog_num_diss_vesse
##
## 0 1 2 3 9
## 8 139 131 83 25
##
## $Total_occlus
##
## 0 1
## 287 72
##
## $Collaterals
##
## 0 1
## 295 63
##
## $Prev_bypass
##
## 0 1
## 330 30
##
## $Prev_stent
##
## 0 1
## 240 120
##
## $Prev_PTCA_angio
##
## 0 1
## 337 21
##
## $Resid_sten
##
## 1 2 3
## 210 59 49
##
## $Death
##
## 0 1
## 385 1
##
## $Angina
##
## 0 1
## 374 11
##
## $Myocard_inf
##
## 0 1
## 382 3
##
## $Sec_perc_interv
##
## 0
## 383
##
## $Coro_bypass
##
## 0 1
## 374 11
##
## $Cerbr_acci
##
## 0
## 385
##
## $Arrythmia_teat
##
## 0 1
## 382 3
##
## $FW_Death
##
## 0 1
## 355 1
##
## $Angina_pectoris_fu
##
## 0 1
## 276 78
##
## $Myocard_infarc_fu
##
## 0 1
## 350 4
##
## $Sec_perc_interv_fu
##
## 0 1 9
## 320 25 9
##
## $Coronary_bypass_fu
##
## 0 1 9
## 334 19 1
##
## $Cerebro_vasc_accid
##
## 0 1
## 349 5
##
## $Arrythmia
##
## 0 1
## 342 12
##
## $Death_date
##
## 08/18/2010 2/3/2010
## 394 1 1
##
## $Cardiac_cause
##
## definite other cause
## 394 1 1
##
## $event_score
##
## 0 1 2 3 6 7 8
## 260 46 4 44 8 1 12
##
## $MACE_dichotomous
##
## 0 1
## 339 46
##
## $pe_betablock
##
## 0 1
## 113 269
##
## $pe_calcium
##
## 0 1 2
## 275 76 31
##
## $pe_nitrate
##
## 0 1
## 251 131
##
## $pe_ASAacetylz
##
## 0 1
## 79 303
##
## $pe_thromb
##
## 0 1 2
## 338 32 12
##
## $pe_ADPclopidog
##
## 0 1
## 197 185
##
## $pe_ezeoverchol
##
## 0 1 2
## 376 3 3
##
## $pe_ACE
##
## 0 1
## 257 125
##
## $pe_ATrecep
##
## 0 1
## 300 82
##
## $pe_diurectica
##
## 0 1 2 3
## 299 28 52 3
##
## $pe_statines
##
## 0 1
## 85 297
##
## $pe_antiarrhy
##
## 0 1
## 373 9
##
## $pe_other
##
## 0 1
## 326 56
##
## $ap_nitrate
##
## 0 1
## 125 257
##
## $ap_ASAacetyl
##
## 0 1
## 370 11
##
## $ap_clopido
##
## 0 1
## 347 34
##
## $dis_betablock
##
## 0 1
## 94 288
##
## $dis_ASAacetyl
##
## 0 1
## 60 322
##
## $dis_ADPclopido
##
## 0 1
## 106 276
##
## $dis_ACEinhi
##
## 0 1
## 179 203
##
## $dis_statines
##
## 0 1
## 60 322
##
## $dis_othermed
##
## 0 1
## 326 56
##
## $Classic_Quartile
##
## 1 2 3 4
## 99 96 104 97
##
## $Classic_Hi_Lo
##
## 0 1
## 195 201
##
## $Intermediate_Quartile
##
## 1 2 3 4
## 95 103 97 101
##
## $IntMed_Hi_Lo
##
## 0 1
## 198 198
##
## $Non_Classic_Quartile
##
## 1 2 3 4
## 103 91 100 102
##
## $Non_Classic_Hi_Lo
##
## 0 1
## 194 202
##
## $Class_Perc_Quartile
##
## 1 2 3 4
## 96 101 108 91
##
## $Int_Perc_Quartile
##
## 1 2 3 4
## 93 101 101 101
##
## $Non_Class_Perc_Quartile
##
## 1 2 3 4
## 97 102 98 99
Distributions of numeric variables
numVarGG <- melt(res[,numVar])
## No id variables; using all as measure variables
numDist <- ggplot(data = numVarGG) +
geom_histogram(mapping = aes(x = value), bins = 15, na.rm = TRUE) +
facet_wrap(facets = ~variable, ncol = 4, scales = "free")
numDist
##Distribution of Expression of NGC
ftrExprsGG <- melt(ftrExprs)
## No id variables; using all as measure variables
ftrDist <- ggplot(data = ftrExprsGG) +
geom_histogram(mapping = aes(x = value), bins = 15) +
facet_wrap(facets = ~variable, ncol = 7, scales = "free") +
theme(
axis.text.x = element_text(size = 8, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 8))
ftrDist
## Warning: Removed 2219 rows containing non-finite values (stat_bin).
Box plot of expression of all NGCs
ftrBox <- ggplot(data = ftrExprsGG) +
geom_boxplot(mapping = aes(x = variable, y = value)) +
theme(
axis.text.x = element_text(size = 6, angle = 45, hjust = 1, vjust = 1)
)
ftrBox
## Warning: Removed 2219 rows containing non-finite values (stat_boxplot).
#Data Imputation for NGC expression
ftrExprsImp <- preProcess(x = ftrExprs, method = "bagImpute")
ftrExprs <- predict(object = ftrExprsImp, newdata = ftrExprs)
#Data paraperation for model fitting (Confirmed diagnosis as the responce) ##Remove NA values from the response
All diseased will be put in the same group
ftrExprsNarm <- ftrExprs[ctmmMir$Conf_Diagn %in% 1:3, ]
resNarm <- res[ctmmMir$Conf_Diagn %in% 1:3, ]
#Put the named response into a vector
resNamed <- resNarm$Conf_Diagn
resNamed[resNamed == 1] <- 0
resNamed[resNamed == 2] <- 1
resNamed[resNamed == 3] <- 1
resNamed <- factor(resNamed, levels = c(0,1), labels = c("Stable_CVD", "Unstable_CVD"))
nrow(ftrExprsNarm)
## [1] 373
table(resNamed)
## resNamed
## Stable_CVD Unstable_CVD
## 305 68
##Violin plot of features vs response
Violin plot is more handy since the response is categorical. Mean and sd are indicated in red color.
ftrExprsNarmGG <- melt(ftrExprsNarm)
## No id variables; using all as measure variables
ftrExprsNarmGG$response <- resNamed
ftrResScat <- ggplot(data = ftrExprsNarmGG, mapping = aes(x = response, y = value)) +
geom_violin() +
stat_summary(fun.data = mean_sdl, geom = "pointrange", size = 0.3, color = "red", fun.args = list(mult = 1)) +
facet_wrap(facets = ~variable, ncol = 11, scales = "free") +
theme(
axis.text.x = element_text(hjust = 1, vjust = 1, angle = 45)
)
ftrResScat
##Vacanoplot
ftrParam <- data.frame(Pvalue = -log10(sapply(ftrExprsNarm, function(x) t.test(x ~ resNamed)$p.value)))
ftrParam$FC <- log2(sapply(ftrExprsNarm, function(x) t.test(x ~ resNamed)$estimate[2]/t.test(x ~ resNamed)$estimate[1]))
ftrParam$label <- NA
ftrParam$label[ftrParam$Pvalue > -log10(0.05) & abs(ftrParam$FC) > 0.0125] <- row.names(ftrParam)[ftrParam$Pvalue > -log10(0.05) & abs(ftrParam$FC) > 0.0125]
ftrParam$color <- as.numeric(!is.na(ftrParam$label)) + 1
ggplot(ftrParam) +
geom_point(mapping = aes(x = FC, y = Pvalue), color = ftrParam$color) +
geom_text(mapping = aes(x = FC, y = Pvalue, label = label), color = ftrParam$color, hjust = -0.1, vjust = 0.1, angle = 45, na.rm = TRUE, size = 3) +
geom_hline(mapping = aes(yintercept = -log10(0.05)), linetype = 2) +
geom_vline(xintercept = 0.0125, linetype = 2) +
geom_vline(xintercept = -0.0125, linetype = 2) +
coord_cartesian(xlim = c(-0.06, 0.06), ylim = c(0, 4)) +
xlab("log(Fold Change)") +
ylab("-log(p-value)")
##P-value and Expression Level Does the p-value rely on expression of guidance cues?
ftrParam$meanExprs <- sapply(ftrExprsNarm, mean)
pCut <- 0.2
ftrParam$labelExprs <- NA
ftrParam$labelExprs[ftrParam$Pvalue > -log10(pCut)] <- row.names(ftrParam)[ftrParam$Pvalue > -log10(pCut)]
ftrParam$colorExprs <- as.numeric(!is.na(ftrParam$labelExprs)) + 1
ggplot(ftrParam) +
geom_point(mapping = aes(x = meanExprs, y = Pvalue), color = ftrParam$colorExprs) +
geom_text(mapping = aes(x = meanExprs, y = Pvalue, label = labelExprs), color = ftrParam$colorExprs, hjust = -0.1, vjust = 0.1, angle = 45, na.rm = TRUE, size = 3) +
geom_hline(yintercept = -log10(pCut), linetype = 2) +
coord_cartesian(ylim = c(0,3.5))
Distribution of -log10 p
hist(ftrParam$Pvalue)
ftrIn <- ftrParam$Pvalue > -log10(pCut)
sum(ftrIn)
## [1] 14
ftrExprsIn <- ftrExprsNarm[,ftrIn]
names(ftrExprsIn)
## [1] "hsa_let_7f" "hsa_miR_130b" "hsa_miR_139_5p" "hsa_miR_145"
## [5] "hsa_miR_146a" "hsa_miR_197" "hsa_miR_342_3p" "hsa_miR_376c"
## [9] "hsa_miR_486_3p" "hsa_let_7e" "hsa_miR_10a" "hsa_miR_132"
## [13] "hsa_miR_133b" "hsa_miR_885_5p"
##Correlation between features
ftrCorr <- cor(ftrExprsIn)
heatmap(ftrCorr, revC = TRUE, scale = "none")
findCorrelation(ftrCorr, cutoff = 0.8)
## integer(0)
##Filtering by correlations of all features
#Confounding factors
Age and Sex are very common confounding factors in biomarker studies. We examine them also here.
##Examining age
ggplot(resNarm, aes(x = resNamed, y = age)) +
geom_violin(color = "red") +
geom_dotplot(binaxis = "y", dotsize = 0.5, stackdir = "center") +
geom_text(mapping = aes(x = 1, y = 90, label = paste("p =", round(t.test(resNarm$age ~ resNamed)$p.value, 3))))
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
Age is not very different amoung stable and unstable group. Therefore, age is unlikely to be a confounding factor in this study.
ftrExprsNarmGG$age <- resNarm$age
ggplot(ftrExprsNarmGG, mapping = aes(x = age, y = value)) +
geom_point(size = 0.5) +
geom_smooth(method = "lm") +
facet_wrap(facets = ~variable, scales = "free", ncol = 7) +
theme(
axis.text.x = element_text(hjust = 1, vjust = 1, angle = 45, size = 6)
)
Age doesn’t correlate much with ftr expression, so that age is again not a confounding factor in this modeling. But age will be included anyway to cancel any confounding effect.
##Examining sex
sexRes <- data.frame(sex = factor(resNarm$Sex, 0:1, c("female", "male")))
sexRes$res <- resNamed
table(sexRes)
## res
## sex Stable_CVD Unstable_CVD
## female 79 20
## male 226 48
chisq.test(table(sexRes))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(sexRes)
## X-squared = 0.19441, df = 1, p-value = 0.6593
Sex is not significantly biased to any group. Sex will also be included anyway to cancell confounding effect.
ftrExprsNarmGG$Sex <- sexRes$sex
ggplot(ftrExprsNarmGG, aes(x = Sex, y = value)) +
geom_violin() +
stat_summary(fun.data = mean_sdl, geom = "pointrange", size = 0.3, color = "red", fun.args = list(mult = 1)) +
facet_wrap(facets = ~variable, ncol = 7, scales = "free") +
theme(
axis.text.x = element_text(hjust = 1, vjust = 1, angle = 45)
)
#Data partition using the samples with a valid confirmed diagnostics, settings of model parameters
##Data partition
set.seed(4000)
trainingSamples <- createDataPartition(y = resNamed, p = 0.8, list = FALSE)
length(trainingSamples)
## [1] 299
resTr <- resNamed[trainingSamples]
resTest <- resNamed[-trainingSamples]
table(resTr)
## resTr
## Stable_CVD Unstable_CVD
## 244 55
table(resTest)
## resTest
## Stable_CVD Unstable_CVD
## 61 13
#Age and Sex will be included
fea <- cbind(age = as.numeric(resNarm$age), Sex = factor(resNarm$Sex), ftrExprsIn)
fea$Sex <- as.numeric(fea$Sex) -1
feaTr <- fea[trainingSamples, ]
feaTest <- fea[-trainingSamples, ]
feaTr[1:6, 1:6]
feaTest[1:6, 1:6]
##Model Training parameters (cross validation …)
10 fold cross-validation used
ctrl = trainControl(method = "repeatedcv", repeats = 10, classProbs = TRUE)
#Model Fitting
##Function for model report
modelReport <- function(model = NULL, obsTr = resTr, obsTest = resTest, newTr = feaTr, newTest = feaTest)
{
predTr <- predict(object = model, newdata = newTr)
predTest <- predict(object = model, newdata = newTest)
predTrProb <- predict(object = model, newdata = newTr, type = "prob")
predTestProb <- predict(object = model, newdata = newTest, type = "prob")
feaImp <- varImp(model)$importance
feaImp$fea <- row.names(feaImp)
colnames(feaImp)[1] <- "Imp"
feaImp <- feaImp[order(feaImp$Imp, decreasing = T), ]
feaImp$fea <- factor(feaImp$fea, levels = row.names(feaImp))
feaImpVis <- ggplot(data = feaImp[1:10,], mapping = aes(x = fea, y = Imp)) +
geom_col() +
xlab(label = NULL) +
ylab(label = paste("Variable Importance", model$modelInfo$label)) +
theme(
axis.text.x = element_text(hjust = 1, vjust = 1, angle = 45)
)
cmGG <- expand.grid(y = -1:-12, x = 1:7)
cmGG$label <- c(NA, NA, "Training Set", "Prediction", "Stable_CVD", "Unstable_CVD",
NA, "Training Set", "Accuracy", "Sensitivity", "Specificity", "Kappa",
"Confusion Matrix and Statistics", NA, "Reference", "Stable_CVD", "Training TN", "Training FP",
NA, NA, "Training Accuracy", "Training Sensitivity", "Training Specificity", "Training Kappa",
NA, NA, NA, "Unstable_CVD", "Training FN", "Training TP",
NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA,
NA, NA, "Test Set", "Prediction", "Stable_CVD", "Unstable_CVD",
NA, "Test Set", "Accuracy", "Sensitivity", "Specificity", "Kappa",
NA, NA, "Reference", "Stable_CVD", "Test TN", "Test FP",
NA, NA, "Test Accuracy", "Test Sensitivity", "Test Specificity", "Test Kappa",
NA, NA, NA, "Unstable_CVD", "Test FN", "Test TP",
NA, NA, NA, NA, NA, NA)
cmGG$label[c(5, 16, 53, 64)] <- as.character(model$levels[1])
cmGG$label[c(6, 28, 54, 76)] <- as.character(model$levels[2])
cmTr <- confusionMatrix(data = predTr, reference = obsTr, positive = levels(resTest)[2])
cmTest <- confusionMatrix(data = predTest, reference = obsTest, positive = levels(resTest)[2])
cmGG$label[17] <- cmTr$table[1,1]
cmGG$label[18] <- cmTr$table[2,1]
cmGG$label[29] <- cmTr$table[1,2]
cmGG$label[30] <- cmTr$table[2,2]
cmGG$label[65] <- cmTest$table[1,1]
cmGG$label[66] <- cmTest$table[2,1]
cmGG$label[77] <- cmTest$table[1,2]
cmGG$label[78] <- cmTest$table[2,2]
cmGG$label[21] <- round(cmTr$overall[1], 4)
cmGG$label[22] <- round(cmTr$byClass[1], 4)
cmGG$label[23] <- round(cmTr$byClass[2], 4)
cmGG$label[24] <- round(cmTr$overall[2], 4)
cmGG$label[69] <- round(cmTest$overall[1], 4)
cmGG$label[70] <- round(cmTest$byClass[1], 4)
cmGG$label[71] <- round(cmTest$byClass[2], 4)
cmGG$label[72] <- round(cmTest$overall[2], 4)
cmPlot <- ggplot(cmGG) +
geom_text(aes(x, y, label = label), hjust = 1, na.rm = TRUE) +
coord_cartesian(xlim = c(0,8), ylim = c(0,-13)) +
geom_segment(mapping = aes(x = -0.05, y = -2.5, xend = 7.1, yend = -2.5), size = 1) + #Top border
geom_segment(mapping = aes(x = -0.05, y = -4.5, xend = 7.1, yend = -4.5)) + #Middel border
geom_segment(mapping = aes(x = -0.05, y = -6.5, xend = 7.1, yend = -6.5), size = 1) + #Bottom border
geom_segment(mapping = aes(x = 1.1, y = -3.5, xend = 3.1, yend = -3.5)) + #Left reference
geom_segment(mapping = aes(x = 5.1, y = -3.5, xend = 7.1, yend = -3.5)) + #Right reference
geom_segment(mapping = aes(x = -0.05, y = -7.5, xend = 2.1, yend = -7.5), size = 1) +
geom_segment(mapping = aes(x = -0.05, y = -8.5, xend = 2.1, yend = -8.5)) +
geom_segment(mapping = aes(x = -0.05, y = -12.5, xend = 2.1, yend = -12.5), size = 1) +
geom_segment(mapping = aes(x = 4.1, y = -7.5, xend = 6.1, yend = -7.5), size = 1) +
geom_segment(mapping = aes(x = 4.1, y = -8.5, xend = 6.1, yend = -8.5)) +
geom_segment(mapping = aes(x = 4.1, y = -12.5, xend = 6.1, yend = -12.5), size = 1) +
theme(line = element_blank(),
text = element_blank(),
title = element_blank(),
plot.background = element_blank(),
panel.background = element_blank()
)
rocCurve <- roc(response = obsTest, predictor = predTestProb[,1])
return(list(
model,
feaImpVis,
cmTr,
cmTest,
plot(rocCurve, legacy.axes = TRUE),
cmPlot
)
)
}
##Boosted Logistic regression model
registerDoParallel(detectCores())
ModelLogit <- train(
x = feaTr,
y = resTr,
method = "LogitBoost",
preProcess = c("center","scale"),
trControl = ctrl,
metric = "Kappa",
tuneLength = 20
)
stopImplicitCluster()
modelReport(model = ModelLogit)
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## [[1]]
## Boosted Logistic Regression
##
## 299 samples
## 16 predictor
## 2 classes: 'Stable_CVD', 'Unstable_CVD'
##
## Pre-processing: centered (16), scaled (16)
## Resampling: Cross-Validated (10 fold, repeated 10 times)
## Summary of sample sizes: 270, 268, 268, 270, 268, 268, ...
## Resampling results across tuning parameters:
##
## nIter Accuracy Kappa
## 11 0.7567527 0.05215187
## 21 0.7458762 0.07874250
## 31 0.7360871 0.06892466
## 41 0.7296863 0.04195305
## 51 0.7217468 0.02398651
## 61 0.7272785 0.03630190
## 71 0.7211835 0.04058436
## 81 0.7139444 0.01309314
## 91 0.7240945 0.02709477
## 101 0.7297887 0.04456887
## 111 0.7330423 0.04893949
## 121 0.7304968 0.04451947
## 131 0.7253726 0.02231876
## 141 0.7210879 0.01883440
## 151 0.7287942 0.03170506
## 161 0.7300393 0.04135351
## 171 0.7311850 0.03346902
## 181 0.7318050 0.03199882
## 191 0.7302503 0.02815371
## 201 0.7342733 0.04654186
##
## Kappa was used to select the optimal model using the largest value.
## The final value used for the model was nIter = 21.
##
## [[2]]
##
## [[3]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction Stable_CVD Unstable_CVD
## Stable_CVD 236 30
## Unstable_CVD 8 25
##
## Accuracy : 0.8729
## 95% CI : (0.8298, 0.9085)
## No Information Rate : 0.8161
## P-Value [Acc > NIR] : 0.0052771
##
## Kappa : 0.4991
##
## Mcnemar's Test P-Value : 0.0006577
##
## Sensitivity : 0.45455
## Specificity : 0.96721
## Pos Pred Value : 0.75758
## Neg Pred Value : 0.88722
## Prevalence : 0.18395
## Detection Rate : 0.08361
## Detection Prevalence : 0.11037
## Balanced Accuracy : 0.71088
##
## 'Positive' Class : Unstable_CVD
##
##
## [[4]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction Stable_CVD Unstable_CVD
## Stable_CVD 51 8
## Unstable_CVD 10 5
##
## Accuracy : 0.7568
## 95% CI : (0.6431, 0.849)
## No Information Rate : 0.8243
## P-Value [Acc > NIR] : 0.9486
##
## Kappa : 0.2081
##
## Mcnemar's Test P-Value : 0.8137
##
## Sensitivity : 0.38462
## Specificity : 0.83607
## Pos Pred Value : 0.33333
## Neg Pred Value : 0.86441
## Prevalence : 0.17568
## Detection Rate : 0.06757
## Detection Prevalence : 0.20270
## Balanced Accuracy : 0.61034
##
## 'Positive' Class : Unstable_CVD
##
##
## [[5]]
##
## Call:
## roc.default(response = obsTest, predictor = predTestProb[, 1])
##
## Data: predTestProb[, 1] in 61 controls (obsTest Stable_CVD) > 13 cases (obsTest Unstable_CVD).
## Area under the curve: 0.6885
##
## [[6]]
##Linear Discriminant Analysis
ModelLda <- train(
x = feaTr,
y = resTr,
method = "lda",
preProcess = c("center","scale"),
metric = "Kappa",
trControl = ctrl
)
modelReport(model = ModelLda)
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## [[1]]
## Linear Discriminant Analysis
##
## 299 samples
## 16 predictor
## 2 classes: 'Stable_CVD', 'Unstable_CVD'
##
## Pre-processing: centered (16), scaled (16)
## Resampling: Cross-Validated (10 fold, repeated 10 times)
## Summary of sample sizes: 269, 269, 270, 270, 270, 268, ...
## Resampling results:
##
## Accuracy Kappa
## 0.8161772 0.07052771
##
##
## [[2]]
##
## [[3]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction Stable_CVD Unstable_CVD
## Stable_CVD 242 51
## Unstable_CVD 2 4
##
## Accuracy : 0.8227
## 95% CI : (0.7746, 0.8643)
## No Information Rate : 0.8161
## P-Value [Acc > NIR] : 0.4173
##
## Kappa : 0.0985
##
## Mcnemar's Test P-Value : 4.301e-11
##
## Sensitivity : 0.07273
## Specificity : 0.99180
## Pos Pred Value : 0.66667
## Neg Pred Value : 0.82594
## Prevalence : 0.18395
## Detection Rate : 0.01338
## Detection Prevalence : 0.02007
## Balanced Accuracy : 0.53227
##
## 'Positive' Class : Unstable_CVD
##
##
## [[4]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction Stable_CVD Unstable_CVD
## Stable_CVD 59 12
## Unstable_CVD 2 1
##
## Accuracy : 0.8108
## 95% CI : (0.703, 0.8925)
## No Information Rate : 0.8243
## P-Value [Acc > NIR] : 0.68580
##
## Kappa : 0.0633
##
## Mcnemar's Test P-Value : 0.01616
##
## Sensitivity : 0.07692
## Specificity : 0.96721
## Pos Pred Value : 0.33333
## Neg Pred Value : 0.83099
## Prevalence : 0.17568
## Detection Rate : 0.01351
## Detection Prevalence : 0.04054
## Balanced Accuracy : 0.52207
##
## 'Positive' Class : Unstable_CVD
##
##
## [[5]]
##
## Call:
## roc.default(response = obsTest, predictor = predTestProb[, 1])
##
## Data: predTestProb[, 1] in 61 controls (obsTest Stable_CVD) > 13 cases (obsTest Unstable_CVD).
## Area under the curve: 0.7251
##
## [[6]]
##K nearest Neighbours
ModelKnn <- train(
x = feaTr,
y = resTr,
method = "knn",
preProcess = c("center","scale"),
trControl = ctrl,
metric = "Kappa",
tuneLength = 10
)
modelReport(model = ModelKnn)
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## [[1]]
## k-Nearest Neighbors
##
## 299 samples
## 16 predictor
## 2 classes: 'Stable_CVD', 'Unstable_CVD'
##
## Pre-processing: centered (16), scaled (16)
## Resampling: Cross-Validated (10 fold, repeated 10 times)
## Summary of sample sizes: 269, 268, 270, 270, 268, 268, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 5 0.8045962 0.03880772
## 7 0.8125577 0.02740495
## 9 0.8182151 0.04088871
## 11 0.8198717 0.03345207
## 13 0.8185269 0.02036498
## 15 0.8185499 0.01853615
## 17 0.8162258 0.00000000
## 19 0.8162258 0.00000000
## 21 0.8162258 0.00000000
## 23 0.8162258 0.00000000
##
## Kappa was used to select the optimal model using the largest value.
## The final value used for the model was k = 9.
##
## [[2]]
##
## [[3]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction Stable_CVD Unstable_CVD
## Stable_CVD 243 52
## Unstable_CVD 1 3
##
## Accuracy : 0.8227
## 95% CI : (0.7746, 0.8643)
## No Information Rate : 0.8161
## P-Value [Acc > NIR] : 0.4173
##
## Kappa : 0.0787
##
## Mcnemar's Test P-Value : 6.51e-12
##
## Sensitivity : 0.05455
## Specificity : 0.99590
## Pos Pred Value : 0.75000
## Neg Pred Value : 0.82373
## Prevalence : 0.18395
## Detection Rate : 0.01003
## Detection Prevalence : 0.01338
## Balanced Accuracy : 0.52522
##
## 'Positive' Class : Unstable_CVD
##
##
## [[4]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction Stable_CVD Unstable_CVD
## Stable_CVD 60 13
## Unstable_CVD 1 0
##
## Accuracy : 0.8108
## 95% CI : (0.703, 0.8925)
## No Information Rate : 0.8243
## P-Value [Acc > NIR] : 0.685800
##
## Kappa : -0.0257
##
## Mcnemar's Test P-Value : 0.003283
##
## Sensitivity : 0.00000
## Specificity : 0.98361
## Pos Pred Value : 0.00000
## Neg Pred Value : 0.82192
## Prevalence : 0.17568
## Detection Rate : 0.00000
## Detection Prevalence : 0.01351
## Balanced Accuracy : 0.49180
##
## 'Positive' Class : Unstable_CVD
##
##
## [[5]]
##
## Call:
## roc.default(response = obsTest, predictor = predTestProb[, 1])
##
## Data: predTestProb[, 1] in 61 controls (obsTest Stable_CVD) > 13 cases (obsTest Unstable_CVD).
## Area under the curve: 0.6885
##
## [[6]]
##Naive Bayes
nbGrid <- expand.grid(laplace = 1:3, usekernel = c(T,F), adjust = 1:3)
registerDoParallel(detectCores())
ModelNb <- train(
x = feaTr,
y = resTr,
method = "naive_bayes",
preProcess = c("center","scale"),
metric = "Kappa",
trControl = ctrl,
tuneGrid = nbGrid
)
stopImplicitCluster()
modelReport(model = ModelNb)
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## [[1]]
## Naive Bayes
##
## 299 samples
## 16 predictor
## 2 classes: 'Stable_CVD', 'Unstable_CVD'
##
## Pre-processing: centered (16), scaled (16)
## Resampling: Cross-Validated (10 fold, repeated 10 times)
## Summary of sample sizes: 269, 269, 270, 268, 269, 270, ...
## Resampling results across tuning parameters:
##
## laplace usekernel adjust Accuracy Kappa
## 1 FALSE 1 0.7796466 0.04972852
## 1 FALSE 2 0.7796466 0.04972852
## 1 FALSE 3 0.7796466 0.04972852
## 1 TRUE 1 0.7785091 0.07159505
## 1 TRUE 2 0.8010986 0.04359518
## 1 TRUE 3 0.8134954 0.03316732
## 2 FALSE 1 0.7796466 0.04972852
## 2 FALSE 2 0.7796466 0.04972852
## 2 FALSE 3 0.7796466 0.04972852
## 2 TRUE 1 0.7785091 0.07159505
## 2 TRUE 2 0.8010986 0.04359518
## 2 TRUE 3 0.8134954 0.03316732
## 3 FALSE 1 0.7796466 0.04972852
## 3 FALSE 2 0.7796466 0.04972852
## 3 FALSE 3 0.7796466 0.04972852
## 3 TRUE 1 0.7785091 0.07159505
## 3 TRUE 2 0.8010986 0.04359518
## 3 TRUE 3 0.8134954 0.03316732
##
## Kappa was used to select the optimal model using the largest value.
## The final values used for the model were laplace = 1, usekernel = TRUE
## and adjust = 1.
##
## [[2]]
##
## [[3]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction Stable_CVD Unstable_CVD
## Stable_CVD 242 42
## Unstable_CVD 2 13
##
## Accuracy : 0.8528
## 95% CI : (0.8075, 0.891)
## No Information Rate : 0.8161
## P-Value [Acc > NIR] : 0.05557
##
## Kappa : 0.3176
##
## Mcnemar's Test P-Value : 4.116e-09
##
## Sensitivity : 0.23636
## Specificity : 0.99180
## Pos Pred Value : 0.86667
## Neg Pred Value : 0.85211
## Prevalence : 0.18395
## Detection Rate : 0.04348
## Detection Prevalence : 0.05017
## Balanced Accuracy : 0.61408
##
## 'Positive' Class : Unstable_CVD
##
##
## [[4]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction Stable_CVD Unstable_CVD
## Stable_CVD 57 12
## Unstable_CVD 4 1
##
## Accuracy : 0.7838
## 95% CI : (0.6728, 0.8711)
## No Information Rate : 0.8243
## P-Value [Acc > NIR] : 0.85693
##
## Kappa : 0.015
##
## Mcnemar's Test P-Value : 0.08012
##
## Sensitivity : 0.07692
## Specificity : 0.93443
## Pos Pred Value : 0.20000
## Neg Pred Value : 0.82609
## Prevalence : 0.17568
## Detection Rate : 0.01351
## Detection Prevalence : 0.06757
## Balanced Accuracy : 0.50567
##
## 'Positive' Class : Unstable_CVD
##
##
## [[5]]
##
## Call:
## roc.default(response = obsTest, predictor = predTestProb[, 1])
##
## Data: predTestProb[, 1] in 61 controls (obsTest Stable_CVD) > 13 cases (obsTest Unstable_CVD).
## Area under the curve: 0.5851
##
## [[6]]
##Bagged CART
registerDoParallel(detectCores())
ModelBagCart <- train(
x = feaTr,
y = resTr,
method = "treebag",
preProcess = c("center","scale"),
metric = "Kappa",
trControl = ctrl
)
stopImplicitCluster()
modelReport(model = ModelBagCart)
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## [[1]]
## Bagged CART
##
## 299 samples
## 16 predictor
## 2 classes: 'Stable_CVD', 'Unstable_CVD'
##
## Pre-processing: centered (16), scaled (16)
## Resampling: Cross-Validated (10 fold, repeated 10 times)
## Summary of sample sizes: 270, 269, 268, 268, 268, 270, ...
## Resampling results:
##
## Accuracy Kappa
## 0.8041531 0.1031237
##
##
## [[2]]
##
## [[3]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction Stable_CVD Unstable_CVD
## Stable_CVD 244 0
## Unstable_CVD 0 55
##
## Accuracy : 1
## 95% CI : (0.9877, 1)
## No Information Rate : 0.8161
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0000
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 1.0000
## Prevalence : 0.1839
## Detection Rate : 0.1839
## Detection Prevalence : 0.1839
## Balanced Accuracy : 1.0000
##
## 'Positive' Class : Unstable_CVD
##
##
## [[4]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction Stable_CVD Unstable_CVD
## Stable_CVD 56 12
## Unstable_CVD 5 1
##
## Accuracy : 0.7703
## 95% CI : (0.6579, 0.8601)
## No Information Rate : 0.8243
## P-Value [Acc > NIR] : 0.9117
##
## Kappa : -0.0064
##
## Mcnemar's Test P-Value : 0.1456
##
## Sensitivity : 0.07692
## Specificity : 0.91803
## Pos Pred Value : 0.16667
## Neg Pred Value : 0.82353
## Prevalence : 0.17568
## Detection Rate : 0.01351
## Detection Prevalence : 0.08108
## Balanced Accuracy : 0.49748
##
## 'Positive' Class : Unstable_CVD
##
##
## [[5]]
##
## Call:
## roc.default(response = obsTest, predictor = predTestProb[, 1])
##
## Data: predTestProb[, 1] in 61 controls (obsTest Stable_CVD) > 13 cases (obsTest Unstable_CVD).
## Area under the curve: 0.6236
##
## [[6]]
##Random Forest
registerDoParallel(detectCores())
ModelRf <- train(
x = feaTr,
y = resTr,
method = "rf",
preProcess = c("center","scale"),
trControl = ctrl,
metric = "Kappa",
tuneLength = 10
)
stopImplicitCluster()
modelReport(model = ModelRf)
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## [[1]]
## Random Forest
##
## 299 samples
## 16 predictor
## 2 classes: 'Stable_CVD', 'Unstable_CVD'
##
## Pre-processing: centered (16), scaled (16)
## Resampling: Cross-Validated (10 fold, repeated 10 times)
## Summary of sample sizes: 270, 269, 269, 268, 269, 269, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.8232510 0.07005336
## 3 0.8215284 0.07138923
## 5 0.8134364 0.06064995
## 6 0.8124587 0.05910540
## 8 0.8114357 0.06319430
## 9 0.8127912 0.07339022
## 11 0.8091009 0.06477150
## 12 0.8101238 0.07499867
## 14 0.8071009 0.06145380
## 16 0.8081346 0.07594190
##
## Kappa was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 16.
##
## [[2]]
##
## [[3]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction Stable_CVD Unstable_CVD
## Stable_CVD 244 0
## Unstable_CVD 0 55
##
## Accuracy : 1
## 95% CI : (0.9877, 1)
## No Information Rate : 0.8161
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0000
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 1.0000
## Prevalence : 0.1839
## Detection Rate : 0.1839
## Detection Prevalence : 0.1839
## Balanced Accuracy : 1.0000
##
## 'Positive' Class : Unstable_CVD
##
##
## [[4]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction Stable_CVD Unstable_CVD
## Stable_CVD 57 12
## Unstable_CVD 4 1
##
## Accuracy : 0.7838
## 95% CI : (0.6728, 0.8711)
## No Information Rate : 0.8243
## P-Value [Acc > NIR] : 0.85693
##
## Kappa : 0.015
##
## Mcnemar's Test P-Value : 0.08012
##
## Sensitivity : 0.07692
## Specificity : 0.93443
## Pos Pred Value : 0.20000
## Neg Pred Value : 0.82609
## Prevalence : 0.17568
## Detection Rate : 0.01351
## Detection Prevalence : 0.06757
## Balanced Accuracy : 0.50567
##
## 'Positive' Class : Unstable_CVD
##
##
## [[5]]
##
## Call:
## roc.default(response = obsTest, predictor = predTestProb[, 1])
##
## Data: predTestProb[, 1] in 61 controls (obsTest Stable_CVD) > 13 cases (obsTest Unstable_CVD).
## Area under the curve: 0.6828
##
## [[6]]
##Stochastic Gradient Boosting
gbmGrid <- expand.grid(interaction.depth = c(3, 5),
n.trees = c(100, 300, 500),
shrinkage = c(0.01, 0.1),
n.minobsinnode = c(5, 10))
registerDoParallel(detectCores())
ModelGbm <- train(
x = feaTr,
y = resTr,
method = "gbm",
preProcess = c("center","scale"),
trControl = ctrl,
metric = "Kappa",
tuneGrid = gbmGrid
)
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.9519 -nan 0.0100 -0.0001
## 2 0.9495 -nan 0.0100 0.0001
## 3 0.9468 -nan 0.0100 0.0002
## 4 0.9446 -nan 0.0100 0.0005
## 5 0.9432 -nan 0.0100 -0.0002
## 6 0.9413 -nan 0.0100 0.0003
## 7 0.9387 -nan 0.0100 0.0002
## 8 0.9360 -nan 0.0100 0.0000
## 9 0.9337 -nan 0.0100 0.0000
## 10 0.9318 -nan 0.0100 -0.0001
## 20 0.9117 -nan 0.0100 0.0002
## 40 0.8718 -nan 0.0100 0.0005
## 60 0.8410 -nan 0.0100 0.0000
## 80 0.8107 -nan 0.0100 -0.0001
## 100 0.7844 -nan 0.0100 0.0002
## 120 0.7615 -nan 0.0100 -0.0003
## 140 0.7394 -nan 0.0100 -0.0001
## 160 0.7174 -nan 0.0100 0.0001
## 180 0.6995 -nan 0.0100 -0.0003
## 200 0.6819 -nan 0.0100 -0.0000
## 220 0.6627 -nan 0.0100 -0.0000
## 240 0.6468 -nan 0.0100 -0.0004
## 260 0.6316 -nan 0.0100 -0.0002
## 280 0.6152 -nan 0.0100 -0.0004
## 300 0.6000 -nan 0.0100 -0.0003
## 320 0.5859 -nan 0.0100 -0.0002
## 340 0.5722 -nan 0.0100 -0.0003
## 360 0.5592 -nan 0.0100 -0.0003
## 380 0.5484 -nan 0.0100 -0.0007
## 400 0.5370 -nan 0.0100 -0.0002
## 420 0.5266 -nan 0.0100 -0.0001
## 440 0.5159 -nan 0.0100 -0.0000
## 460 0.5054 -nan 0.0100 -0.0001
## 480 0.4959 -nan 0.0100 -0.0002
## 500 0.4863 -nan 0.0100 -0.0003
stopImplicitCluster()
modelReport(model = ModelGbm)
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## [[1]]
## Stochastic Gradient Boosting
##
## 299 samples
## 16 predictor
## 2 classes: 'Stable_CVD', 'Unstable_CVD'
##
## Pre-processing: centered (16), scaled (16)
## Resampling: Cross-Validated (10 fold, repeated 10 times)
## Summary of sample sizes: 269, 269, 269, 269, 268, 270, ...
## Resampling results across tuning parameters:
##
## shrinkage interaction.depth n.minobsinnode n.trees Accuracy
## 0.01 3 5 100 0.8162258
## 0.01 3 5 300 0.8145491
## 0.01 3 5 500 0.8064112
## 0.01 3 10 100 0.8162258
## 0.01 3 10 300 0.8081568
## 0.01 3 10 500 0.8081461
## 0.01 5 5 100 0.8169155
## 0.01 5 5 300 0.8118472
## 0.01 5 5 500 0.8017093
## 0.01 5 10 100 0.8162258
## 0.01 5 10 300 0.8074687
## 0.01 5 10 500 0.8054557
## 0.10 3 5 100 0.7787197
## 0.10 3 5 300 0.7654983
## 0.10 3 5 500 0.7638877
## 0.10 3 10 100 0.7825977
## 0.10 3 10 300 0.7751594
## 0.10 3 10 500 0.7711809
## 0.10 5 5 100 0.7870753
## 0.10 5 5 300 0.7759933
## 0.10 5 5 500 0.7759473
## 0.10 5 10 100 0.7911357
## 0.10 5 10 300 0.7806967
## 0.10 5 10 500 0.7743052
## Kappa
## 0.000000000
## 0.071911789
## 0.082173775
## 0.000000000
## 0.033147587
## 0.073365107
## 0.005853659
## 0.072947095
## 0.068893186
## 0.000000000
## 0.038448072
## 0.068715408
## 0.054473658
## 0.038119087
## 0.039233275
## 0.046602867
## 0.051840044
## 0.049827129
## 0.065503828
## 0.058146645
## 0.061981704
## 0.060132406
## 0.049749924
## 0.043383832
##
## Kappa was used to select the optimal model using the largest value.
## The final values used for the model were n.trees = 500,
## interaction.depth = 3, shrinkage = 0.01 and n.minobsinnode = 5.
##
## [[2]]
##
## [[3]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction Stable_CVD Unstable_CVD
## Stable_CVD 244 34
## Unstable_CVD 0 21
##
## Accuracy : 0.8863
## 95% CI : (0.8447, 0.92)
## No Information Rate : 0.8161
## P-Value [Acc > NIR] : 0.0006363
##
## Kappa : 0.502
##
## Mcnemar's Test P-Value : 1.519e-08
##
## Sensitivity : 0.38182
## Specificity : 1.00000
## Pos Pred Value : 1.00000
## Neg Pred Value : 0.87770
## Prevalence : 0.18395
## Detection Rate : 0.07023
## Detection Prevalence : 0.07023
## Balanced Accuracy : 0.69091
##
## 'Positive' Class : Unstable_CVD
##
##
## [[4]]
## Confusion Matrix and Statistics
##
## Reference
## Prediction Stable_CVD Unstable_CVD
## Stable_CVD 55 12
## Unstable_CVD 6 1
##
## Accuracy : 0.7568
## 95% CI : (0.6431, 0.849)
## No Information Rate : 0.8243
## P-Value [Acc > NIR] : 0.9486
##
## Kappa : -0.0262
##
## Mcnemar's Test P-Value : 0.2386
##
## Sensitivity : 0.07692
## Specificity : 0.90164
## Pos Pred Value : 0.14286
## Neg Pred Value : 0.82090
## Prevalence : 0.17568
## Detection Rate : 0.01351
## Detection Prevalence : 0.09459
## Balanced Accuracy : 0.48928
##
## 'Positive' Class : Unstable_CVD
##
##
## [[5]]
##
## Call:
## roc.default(response = obsTest, predictor = predTestProb[, 1])
##
## Data: predTestProb[, 1] in 61 controls (obsTest Stable_CVD) > 13 cases (obsTest Unstable_CVD).
## Area under the curve: 0.6772
##
## [[6]]
#Data summarizations
Comparison of All tested models
Information retrieval
modelBestTune <- function(model = NULL)
{
bestTuneParam <- as.data.frame(model$bestTune)
resultTable <- model$results
n <- ncol(bestTuneParam)
bestTuneParam <- as.data.frame(bestTuneParam[,names(resultTable)[1:n]])
bestTune <- rep(TRUE, nrow(resultTable))
for (i in 1:n)
bestTune <- bestTune & (resultTable[,i] == bestTuneParam[1,i])
bestModel <- as.matrix(resultTable[bestTune, (n+1):ncol(resultTable)])
row.names(bestModel) <- model$modelInfo$label
reportBest <- modelReport(model)
bestModel <- cbind(bestModel,
t(as.matrix(reportBest[[3]]$overall[1:2])),
t(as.matrix(reportBest[[3]]$byClass[1:2])),
t(as.matrix(reportBest[[4]]$overall[1:2])),
t(as.matrix(reportBest[[4]]$byClass[1:2])),
auc = as.numeric(reportBest[[5]]$auc))
colnames(bestModel)[5:8] <- paste("Tr", colnames(bestModel)[5:8], sep = "_")
colnames(bestModel)[9:12] <- paste("Test", colnames(bestModel)[9:12], sep = "_")
return(bestModel)
}
modelList <- mget(ls(pattern = "Model"))
modelSum <- do.call(rbind, lapply(X = modelList, FUN = modelBestTune))
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
## Setting levels: control = Stable_CVD, case = Unstable_CVD
## Setting direction: controls > cases
modelSum <- data.frame(modelSum)
modelSum$modelName <- row.names(modelSum)
##Names and abbreviations of Models
kable(cbind(Name = row.names(modelSum), Abbreviation = as.character(modelSum$modelName)))
| Name | Abbreviation |
|---|---|
| Bagged CART | Bagged CART |
| Stochastic Gradient Boosting | Stochastic Gradient Boosting |
| k-Nearest Neighbors | k-Nearest Neighbors |
| Linear Discriminant Analysis | Linear Discriminant Analysis |
| Boosted Logistic Regression | Boosted Logistic Regression |
| Naive Bayes | Naive Bayes |
| Random Forest | Random Forest |
##Accuracy in Cross Validation/Training/Test
ggplot(data = modelSum) +
geom_point(mapping = aes(x = modelName, y = Accuracy, color = "cv"), size = 2) +
geom_errorbar(mapping = aes(x = modelName,
ymax = Accuracy + AccuracySD,
ymin = Accuracy - AccuracySD,
color = "cv"
), width = 0.3, show.legend = FALSE) +
geom_point(mapping = aes(x = modelName, y = Tr_Accuracy, color = "Training"), size = 2) +
geom_point(mapping = aes(x = modelName, y = Test_Accuracy, color = "Test"), size = 2) +
geom_hline(yintercept = 282/345, linetype = 3 ) +
xlab("Models") +
ylab("Accuracy") +
coord_flip() +
theme_bw()
##Kappa
ggplot(data = modelSum) +
geom_point(mapping = aes(x = modelName, y = Kappa, color = "cv"), size = 2) +
geom_errorbar(mapping = aes(x = modelName,
ymax = Kappa + KappaSD,
ymin = Kappa - KappaSD,
color = "cv"
), width = 0.3) +
geom_point(mapping = aes(x = modelName, y = Tr_Kappa, color = "Training"), size = 2) +
geom_point(mapping = aes(x = modelName, y = Test_Kappa, color = "Test"), size = 2) +
xlab("Models") +
ylab("Kappa") +
coord_flip() +
theme_bw()
##Sensitivity
ggplot(data = modelSum) +
geom_point(mapping = aes(x = modelName, y = Tr_Sensitivity, color = "Sensitivity Training"), size = 2) +
geom_point(mapping = aes(x = modelName, y = Test_Sensitivity, color = "Sensitivity Test"), size = 2) +
xlab("Models") +
ylab("Sensitivity") +
coord_flip() +
theme_bw()
##Specificity
ggplot(data = modelSum) +
geom_point(mapping = aes(x = modelName, y = Tr_Specificity, color = "Specificity Training"), size = 2) +
geom_point(mapping = aes(x = modelName, y = Test_Specificity, color = "Specificity Test"), size = 2) +
xlab("Models") +
ylab("Specificity") +
coord_flip() +
theme_bw()
##The Specificity-Sensitivity Plot
ggplot(data = modelSum) +
geom_point(mapping = aes(x = 1 - Tr_Specificity, y = Tr_Sensitivity, color = "Training"), size = 2) +
geom_point(mapping = aes(x = 1 - Test_Specificity, y = Test_Sensitivity, color = "Test"), size = 2) +
geom_text(mapping = aes(x = 1 - Tr_Specificity, y = Tr_Sensitivity, color = "Training", label = modelName), size = 3, angle = 45, hjust = 0, show.legend = FALSE) +
geom_text(mapping = aes(x = 1 - Test_Specificity, y = Test_Sensitivity, color = "Test", label = modelName), size = 3, angle = 45, hjust = 0, show.legend = FALSE) +
xlab("1 - Specificity") +
ylab("Sensitivity") +
coord_cartesian(xlim = c(0,1)) +
geom_abline(slope = 1, linetype = 3) +
theme_bw(
)
##Area under curve
ggplot(data = modelSum) +
geom_point(mapping = aes(x = modelName, y = auc), size = 2) +
xlab("Models") +
ylab("Area Under ROC curve") +
coord_flip() +
theme_bw()
#Conclusion None of the performance of the above models is satisfactory, meaning that ftrs are weak predictors of stable and unstable CVD. Gbm performs better than other models, with best Area Under ROC curve and best sensitivity in the test dataset.
#Session info
save.image(file = "./Rdata/Mir_Stable_NonStable_new.RData")
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-conda_cos6-linux-gnu (64-bit)
## Running under: Ubuntu 19.04
##
## Matrix products: default
## BLAS/LAPACK: /home/huayu/anaconda3/envs/rstudio/lib/R/lib/libRblas.so
##
## locale:
## [1] en_GB.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] gbm_2.1.5 randomForest_4.6-14 e1071_1.7-2
## [4] plyr_1.8.4 ipred_0.9-9 naivebayes_0.9.6
## [7] MASS_7.3-51.4 caTools_1.17.1.2 pROC_1.15.0
## [10] doParallel_1.0.15 iterators_1.0.12 foreach_1.4.7
## [13] caret_6.0-84 lattice_0.20-38 reshape2_1.4.3
## [16] ggplot2_3.2.0 knitr_1.23
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.2 lubridate_1.7.4 class_7.3-15
## [4] assertthat_0.2.1 digest_0.6.20 R6_2.4.0
## [7] backports_1.1.4 acepack_1.4.1 stats4_3.5.1
## [10] evaluate_0.14 highr_0.8 pillar_1.4.2
## [13] rlang_0.4.0 lazyeval_0.2.2 rstudioapi_0.10
## [16] data.table_1.12.2 rpart_4.1-15 Matrix_1.2-17
## [19] checkmate_1.9.4 rmarkdown_1.14 labeling_0.3
## [22] splines_3.5.1 foreign_0.8-71 gower_0.2.1
## [25] stringr_1.4.0 htmlwidgets_1.3 munsell_0.5.0
## [28] compiler_3.5.1 xfun_0.8 base64enc_0.1-3
## [31] pkgconfig_2.0.2 htmltools_0.3.6 nnet_7.3-12
## [34] tidyselect_0.2.5 htmlTable_1.13.1 tibble_2.1.3
## [37] gridExtra_2.3 prodlim_2018.04.18 Hmisc_4.2-0
## [40] codetools_0.2-16 crayon_1.3.4 dplyr_0.8.3
## [43] withr_2.1.2 bitops_1.0-6 recipes_0.1.6
## [46] ModelMetrics_1.2.2 grid_3.5.1 jsonlite_1.6
## [49] nlme_3.1-140 gtable_0.3.0 magrittr_1.5
## [52] scales_1.0.0 stringi_1.4.3 latticeExtra_0.6-28
## [55] timeDate_3043.102 generics_0.0.2 Formula_1.2-3
## [58] lava_1.6.5 RColorBrewer_1.1-2 tools_3.5.1
## [61] glue_1.3.1 purrr_0.3.2 survival_2.44-1.1
## [64] yaml_2.2.0 colorspace_1.4-1 cluster_2.1.0